Code
library(tidyverse)
library(easystats)
library(patchwork)
library(ggside)library(tidyverse)
library(easystats)
library(patchwork)
library(ggside)df <- read.csv("../data/rawdata_participants.csv") |>
mutate(across(everything(), ~ifelse(.x == "", NA, .x))) |>
mutate(Experimenter = case_when(
Language=="English" & Experimenter %in% c("reddit7", "reddit8", "reddit1", "reddit2", "reddit5") ~ "Reddit (other)",
.default = Experimenter
))
dftask <- read.csv("../data/rawdata_task.csv") |>
full_join(
df[c("Participant", "Sex", "SexualOrientation")],
by = join_by(Participant)
)The initial sample consisted of 723 participants (Mean age = 30.2, SD = 12.1, range: [18, 80], 0.1% missing; Sex: 40.4% females, 58.5% males, 1.1% other; Education: Bachelor, 34.30%; Doctorate, 5.12%; High School, 41.08%; Master, 16.87%; Other, 2.21%; Primary School, 0.41%; Country: 28.49% UK, 16.60% USA, 14.66% Colombia, 40.25% other).
# Create Sexual "relevance" variable (Relevant, irrelevant, non-erotic)
dftask <- dftask |>
mutate(Relevance = case_when(
Type == "Non-erotic" ~ "Non-erotic",
Sex == "Male" & SexualOrientation == "Heterosexual" & Category == "Female" ~ "Relevant",
Sex == "Female" & SexualOrientation == "Heterosexual" & Category == "Male" ~ "Relevant",
Sex == "Male" & SexualOrientation == "Homosexual" & Category == "Male" ~ "Relevant",
Sex == "Female" & SexualOrientation == "Homosexual" & Category == "Female" ~ "Relevant",
# TODO: what to do with "Other"?
SexualOrientation %in% c("Bisexual", "Other") & Category %in% c("Male", "Female") ~ "Relevant",
.default = "Irrelevant"
)) plot_recruitement <- function(df) {
# Consecutive count of participants per day (as area)
df |>
mutate(Date = as.Date(Date, format = "%d/%m/%Y")) |>
group_by(Date, Language, Experimenter) |>
summarize(N = n()) |>
ungroup() |>
# https://bocoup.com/blog/padding-time-series-with-r
complete(Date, Language, Experimenter, fill = list(N = 0)) |>
group_by(Language, Experimenter) |>
mutate(N = cumsum(N)) |>
ggplot(aes(x = Date, y = N)) +
geom_area(aes(fill=Experimenter)) +
scale_y_continuous(expand = c(0, 0)) +
labs(
title = "Recruitment History",
x = "Date",
y = "Total Number of Participants"
) +
see::theme_modern()
}
plot_recruitement(df) +
facet_wrap(~Language, nrow=5, scales = "free_y")# Table
table_recruitment <- function(df) {
summarize(df, N = n(), .by=c("Language", "Experimenter")) |>
arrange(desc(N)) |>
gt::gt() |>
gt::opt_stylize() |>
gt::opt_interactive(use_compact_mode = TRUE) |>
gt::tab_header("Number of participants per recruitment source")
}
table_recruitment(df)plot_recruitement(filter(df, Language == "English"))table_recruitment(filter(df, Language == "English"))plot_recruitement(filter(df, Language == "French"))table_recruitment(filter(df, Language == "French"))plot_recruitement(filter(df, Language == "Italian"))table_recruitment(filter(df, Language == "Italian"))plot_recruitement(filter(df, Language == "Colombian"))table_recruitment(filter(df, Language == "Colombian"))plot_recruitement(filter(df, Language == "Spanish"))table_recruitment(filter(df, Language == "Spanish"))The majority of participants found the study to be a “fun” experience. Interestingly, reports of “fun” were significantly associated with finding at least some stimuli arousing. Conversely, reporting “no feelings” was associated with finding the experiment “boring”.
df |>
select(starts_with("Feedback"), -Feedback_Comments) |>
pivot_longer(everything(), names_to = "Question", values_to = "Answer") |>
group_by(Question, Answer) |>
summarise(prop = n()/nrow(df), .groups = 'drop') |>
complete(Question, Answer, fill = list(prop = 0)) |>
filter(Answer == "True") |>
mutate(Question = str_remove(Question, "Feedback_"),
Question = str_replace(Question, "AILessArousing", "AI = Less arousing"),
Question = str_replace(Question, "AIMoreArousing", "AI = More arousing"),
Question = str_replace(Question, "CouldNotDiscriminate", "Hard to discriminate"),
Question = str_replace(Question, "LabelsIncorrect", "Labels were incorrect"),
Question = str_replace(Question, "NoFeels", "Didn't feel anything"),
Question = str_replace(Question, "CouldDiscriminate", "Easy to discriminate"),
Question = str_replace(Question, "LabelsReversed", "Labels were reversed")) |>
mutate(Question = fct_reorder(Question, desc(prop))) |>
ggplot(aes(x = Question, y = prop)) +
geom_bar(stat = "identity") +
scale_y_continuous(expand = c(0, 0), breaks= scales::pretty_breaks(), labels=scales::percent) +
labs(x="Feedback", y = "Participants", title = "Feedback") +
theme_modern(axis.title.space = 15) +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2), vjust = 7),
axis.text.y = element_text(size = rel(1.1)),
axis.text.x = element_text(size = rel(1.1), angle = 45, hjust = 1),
axis.title.x = element_blank()
)cor <- df |>
select(starts_with("Feedback"), -Feedback_Comments) |>
mutate_all(~ifelse(.=="True", 1, 0)) |>
correlation(method="tetrachoric", redundant = TRUE) |>
correlation::cor_sort() |>
correlation::cor_lower()
cor |>
mutate(val = paste0(insight::format_value(rho), format_p(p, stars_only=TRUE))) |>
mutate(Parameter2 = fct_rev(Parameter2)) |>
mutate(Parameter1 = fct_relabel(Parameter1, \(x) str_remove_all(x, "Feedback_")),
Parameter2 = fct_relabel(Parameter2, \(x) str_remove_all(x, "Feedback_"))) |>
ggplot(aes(x=Parameter1, y=Parameter2)) +
geom_tile(aes(fill = rho), color = "white") +
geom_text(aes(label = val), size = 3) +
labs(title = "Feedback Co-occurence Matrix") +
scale_fill_gradient2(
low = "#2196F3",
mid = "white",
high = "#F44336",
breaks = c(-1, 0, 1),
guide = guide_colourbar(ticks=FALSE),
midpoint = 0,
na.value = "grey85",
limit = c(-1, 1)) +
theme_minimal() +
theme(legend.title = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1))outliers <- c(
# "S206" # Collapsed RTs in both phases
# "S399" # Negative Arousal-Valence correlations
)
potentials <- list()df |>
ggplot(aes(x=Mobile, fill=Language)) +
geom_bar() +
geom_hline(yintercept=0.5*nrow(df), linetype="dashed") +
theme_modern()We removed 193 participants that participated with a mobile device.
df <- filter(df, Mobile == "False")
dftask <- filter(dftask, Participant %in% df$Participant)The experiment’s median duration is 24.99 min (50% CI [20.02, 27.69]).
df |>
mutate(Participant = fct_reorder(Participant, Experiment_Duration),
Category = ifelse(Experiment_Duration > 60, "extra", "ok"),
Duration = ifelse(Experiment_Duration > 60, 60, Experiment_Duration),
Group = ifelse(Participant %in% outliers, "Outlier", "ok")) |>
ggplot(aes(y = Participant, x = Duration)) +
geom_point(aes(color = Group, shape = Category)) +
geom_vline(xintercept = median(df$Experiment_Duration), color = "red", linetype = "dashed") +
scale_shape_manual(values = c("extra" = 3, ok = 19)) +
scale_color_manual(values = c("Outlier" = "red", ok = "black"), guide="none") +
guides(color = "none", shape = "none") +
ggside::geom_xsidedensity(fill = "#4CAF50", color=NA) +
ggside::scale_xsidey_continuous(expand = c(0, 0)) +
labs(
title = "Experiment Completion Time",
x = "Duration (in minutes)",
y = "Participant"
) +
theme_bw() +
ggside::theme_ggside_void() +
theme(ggside.panel.scale = .3,
panel.border = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank())potentials$expe_duration <- arrange(df, Experiment_Duration) |>
select(Participant, Experiment_Duration) |>
head(5) plot_hist <- function(dat) {
dens <- rbind(
mutate(bayestestR::estimate_density(filter(dftask, RT1 < 40 & RT2 < 40)$RT1),
Phase="Emotional ratings",
y = y / max(y)),
mutate(bayestestR::estimate_density(filter(dftask, RT1 < 40 & RT2 < 40)$RT2),
Phase="Reality rating",
y = y / max(y))
)
dat |>
filter(RT1 < 40 & RT2 < 40) |> # Remove very long RTs
# mutate(Participant = fct_reorder(Participant, RT1)) |>
pivot_longer(cols = c(RT1, RT2), names_to = "Phase", values_to = "RT") |>
mutate(Phase = ifelse(Phase == "RT1", "Emotional ratings", "Reality rating")) |>
ggplot(aes(x=RT)) +
geom_area(data=dens, aes(x=x, y=y, fill=Phase), alpha=0.33, position="identity") +
geom_density(aes(color=Phase, y=after_stat(scaled)), linewidth=1.5) +
scale_x_sqrt(breaks=c(0, 2, 5, 10, 20)) +
theme_minimal() +
theme(axis.title.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
axis.line.y = element_blank()) +
labs(title = "Distribution of Response Time for each Participant", x="Response time per stimuli (s)") +
facet_wrap(~Participant, ncol=5, scales="free_y") +
coord_cartesian(xlim = c(0, 25))
}df |>
mutate(Participant = fct_reorder(Participant, BAIT_Duration),
Category = ifelse(BAIT_Duration > 5, "extra", "ok"),
Duration = ifelse(BAIT_Duration > 5, 5, BAIT_Duration),
Group = ifelse(Participant %in% outliers, "Outlier", "ok")) |>
ggplot(aes(y = Participant, x = Duration)) +
geom_point(aes(color = Group, shape = Category)) +
geom_vline(xintercept = median(df$BAIT_Duration), color = "red", linetype = "dashed") +
scale_shape_manual(values = c("extra" = 3, ok = 19)) +
scale_color_manual(values = c("Outlier" = "red", ok = "black"), guide="none") +
guides(color = "none", shape = "none") +
ggside::geom_xsidedensity(fill = "#9C27B0", color=NA) +
ggside::scale_xsidey_continuous(expand = c(0, 0)) +
labs(
title = "Questionnaire Completion Time",
x = "Duration (in minutes)",
y = "Participant"
) +
theme_bw() +
ggside::theme_ggside_void() +
theme(ggside.panel.scale = .3,
panel.border = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank()) dat <- dftask |>
filter(Relevance %in% c("Relevant", "Non-erotic")) |>
group_by(Participant, Type) |>
summarise(Arousal = mean(Arousal),
Valence = mean(Valence),
Enticement = mean(Enticement),
.groups = "drop") |>
pivot_wider(names_from = Type, values_from = all_of(c("Arousal", "Valence", "Enticement"))) |>
transmute(Participant = Participant,
Arousal = Arousal_Erotic - `Arousal_Non-erotic`,
Valence = Valence_Erotic - `Valence_Non-erotic`,
Enticement = Enticement_Erotic - `Enticement_Non-erotic`) |>
filter(!is.na(Arousal)) |>
mutate(Participant = fct_reorder(Participant, Arousal))
dat |>
pivot_longer(-Participant) |>
mutate(Group = ifelse(Participant %in% outliers, "Outlier", "ok")) |>
ggplot(aes(x=value, y=Participant, fill=Group)) +
geom_bar(aes(fill=value), stat = "identity") +
scale_fill_gradient2(low = "#3F51B5", mid = "#FF9800", high = "#4CAF50", midpoint = 0) +
# scale_fill_manual(values = c("Outlier" = "red", ok = "black"), guide="none") +
theme_bw() +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank()) +
labs(title = "Difference between Erotic and Neutral", x="Erotic - Neutral") +
facet_wrap(~name, ncol=3, scales="free_x")potentials$emo_diff <- arrange(dat, Arousal) |>
head(5)# Single arousal response (0)
outliers <- summarize(dftask, n = length(unique(Arousal)), .by="Participant") |>
filter(n == 1) |>
select(Participant) |>
pull() |>
c(outliers)
dat <- dftask |>
filter(!Participant %in% outliers) |>
summarize(cor_ArVal = cor(Arousal, Valence),
cor_ArEnt = cor(Arousal, Enticement),
.by="Participant")
dat |>
left_join(df[c("Participant", "Language")], by="Participant") |>
mutate(Participant = fct_reorder(Participant, cor_ArVal)) |>
pivot_longer(starts_with("cor_")) |>
mutate(Group = ifelse(Participant %in% outliers, "Outlier", "ok")) |>
mutate(name = fct_relevel(name, "cor_ArVal", "cor_ArEnt"),
name = fct_recode(name, "Arousal - Valence" = "cor_ArVal", "Arousal - Enticement" = "cor_ArEnt")) |>
ggplot(aes(y = Participant, x = value)) +
geom_bar(aes(fill = Language), stat = "identity") +
# scale_fill_gradient2(low = "#3F51B5", mid = "#FF9800", high = "#4CAF50", midpoint = 0) +
# scale_fill_manual(values = c("Outlier" = "red", ok = "black"), guide="none") +
theme_bw() +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank()) +
labs(title = "Difference between Erotic and Neutral", x="Erotic - Neutral") +
facet_wrap(~name, ncol=3, scales="free_x")potentials$emo_cor <- arrange(dat, cor_ArVal) |>
head(5)We removed 7 that showed no variation in their arousal response.
# c(as.character(potentials$expe_duration$Participant),
# as.character(potentials$emo_diff$Participant),
# as.character(potentials$emo_cor$Participant)) |>
# table()
#
df <- filter(df, !Participant %in% outliers)
dftask <- filter(dftask, !Participant %in% outliers)df |>
ggplot(aes(x = Sex)) +
geom_bar(aes(fill = SexualOrientation)) +
scale_y_continuous(expand = c(0, 0), breaks = scales::pretty_breaks()) +
scale_fill_metro_d() +
labs(x = "Biological Sex", y = "Number of Participants", title = "Sex and Sexual Orientation", fill = "Sexual Orientation") +
theme_modern(axis.title.space = 15) +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2), vjust = 7),
axis.text.y = element_text(size = rel(1.1)),
axis.text.x = element_text(size = rel(1.1)),
axis.title.x = element_blank()
)We removed 23 participants that were incompatible with further analysis.
df <- filter(df, Sex != "Other" & SexualOrientation != "Other")
dftask <- filter(dftask, Participant %in% df$Participant)show_distribution <- function(dftask, target="Arousal") {
dftask |>
filter(SexualOrientation %in% c("Heterosexual", "Bisexual", "Homosexual")) |>
bayestestR::estimate_density(select=target,
at=c("Relevance", "Category", "Sex", "SexualOrientation"),
method="KernSmooth") |>
ggplot(aes(x = x, y = y)) +
geom_line(aes(color = Relevance, linetype = Category), linewidth=1) +
facet_grid(SexualOrientation~Sex, scales="free_y") +
scale_color_manual(values = c("Relevant" = "red", "Non-erotic" = "blue", "Irrelevant"="darkorange")) +
scale_y_continuous(expand = c(0, 0)) +
scale_x_continuous(expand = c(0, 0)) +
theme_minimal() +
theme(axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
plot.title = element_text(face="bold")) +
labs(title = target)
}
(show_distribution(dftask, "Arousal") | show_distribution(dftask, "Valence")) /
(show_distribution(dftask, "Enticement") | show_distribution(dftask, "Realness")) +
patchwork::plot_layout(guides = "collect") +
patchwork::plot_annotation(title = "Distribution of Appraisals depending on the Sexual Profile",
theme = theme(plot.title = element_text(hjust = 0.5, face="bold"))) We kept only heterosexual participants (70.80%).
df <- filter(df, SexualOrientation == "Heterosexual")
dftask <- filter(dftask, Participant %in% df$Participant)df <- filter(df, !Participant %in% outliers)
dftask <- filter(dftask, !Participant %in% outliers)The final sample includes 354 participants (Mean age = 32.0, SD = 13.1, range: [18, 80], 0.3% missing; Sex: 37.3% females, 62.7% males, 0.0% other; Education: Bachelor, 34.75%; Doctorate, 6.50%; High School, 35.59%; Master, 20.34%; Other, 2.54%; Primary School, 0.28%; Country: 24.58% UK, 21.19% Colombia, 15.54% USA, 11.02% France, 27.68% other).
p_country <- dplyr::select(df, region = Country) |>
group_by(region) |>
summarize(n = n()) |>
right_join(map_data("world"), by = "region") |>
ggplot(aes(long, lat, group = group)) +
geom_polygon(aes(fill = n)) +
scale_fill_gradientn(colors = c("#FFEB3B", "red", "purple")) +
labs(fill = "N") +
theme_void() +
labs(title = "A Geographically Diverse Sample", subtitle = "Number of participants by country") +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2))
)
p_countryggwaffle::waffle_iron(df, ggwaffle::aes_d(group = Ethnicity), rows=10) |>
ggplot(aes(x, y, fill = group)) +
ggwaffle::geom_waffle() +
coord_equal() +
scale_fill_flat_d() +
ggwaffle::theme_waffle() +
labs(title = "Self-reported Ethnicity", subtitle = "Each square represents a participant", fill="") +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2)),
axis.title.x = element_blank(),
axis.title.y = element_blank()
)Warning: Using the `size` aesthetic in this geom was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` in the `default_aes` field and elsewhere instead.
p_age <- estimate_density(df$Age) |>
normalize(select = y) |>
mutate(y = y * 86) |> # To match the binwidth
ggplot(aes(x = x)) +
geom_histogram(data=df, aes(x = Age), fill = "#616161", bins=28) +
# geom_line(aes(y = y), color = "orange", linewidth=2) +
geom_vline(xintercept = mean(df$Age), color = "red", linewidth=1.5) +
# geom_label(data = data.frame(x = mean(df$Age) * 1.15, y = 0.95 * 75), aes(y = y), color = "red", label = paste0("Mean = ", format_value(mean(df$Age)))) +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
labs(title = "Age", y = "Number of Participants", color = NULL, subtitle = "Distribution of participants' age") +
theme_modern(axis.title.space = 10) +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2), vjust = 7),
axis.text.y = element_text(size = rel(1.1)),
axis.text.x = element_text(size = rel(1.1)),
axis.title.x = element_blank()
)
p_ageWarning: Removed 1 rows containing non-finite values (`stat_bin()`).
Warning: Removed 1 rows containing missing values (`geom_vline()`).
p_edu <- df |>
mutate(Education = fct_relevel(Education, "Other", "Primary School", "High School", "Bachelor", "Master", "Doctorate")) |>
ggplot(aes(x = Education)) +
geom_bar(aes(fill = Education)) +
scale_y_continuous(expand = c(0, 0), breaks= scales::pretty_breaks()) +
scale_fill_viridis_d(guide = "none") +
labs(title = "Education", y = "Number of Participants", subtitle = "Participants per achieved education level") +
theme_modern(axis.title.space = 15) +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2), vjust = 7),
axis.text.y = element_text(size = rel(1.1)),
axis.text.x = element_text(size = rel(1.1)),
axis.title.x = element_blank()
)
p_educolors <- c(
"NA" = "#2196F3", "None" = "#E91E63", "Condoms (for partner)" = "#9C27B0",
"Combined pills" = "#FF9800", "Intrauterine Device (IUD)" = "#FF5722",
"Intrauterine System (IUS)" = "#795548", "Progestogen pills" = "#4CAF50",
"Other" = "#FFC107", "Condoms (female)" = "#607D8B"
)
colors <- colors[names(colors) %in% c("NA", df$BirthControl)]
p_sex <- df |>
mutate(BirthControl = ifelse(Sex == "Male", "NA", BirthControl),
BirthControl = fct_relevel(BirthControl, names(colors))) |>
ggplot(aes(x = Sex)) +
geom_bar(aes(fill = BirthControl)) +
scale_y_continuous(expand = c(0, 0), breaks = scales::pretty_breaks()) +
scale_fill_manual(
values = colors,
breaks = names(colors)[2:length(colors)]
) +
labs(x = "Biological Sex", y = "Number of Participants", title = "Sex and Birth Control Method", fill = "Birth Control") +
theme_modern(axis.title.space = 15) +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2), vjust = 7),
axis.text.y = element_text(size = rel(1.1)),
axis.text.x = element_text(size = rel(1.1)),
axis.title.x = element_blank()
)
p_sexp_sexprofile <- df |>
select(Participant, Sex, SexualOrientation, SexualActivity, COPS_Duration_1, COPS_Frequency_2) |>
pivot_longer(-all_of(c("Participant", "Sex"))) |>
mutate(name = str_replace_all(name, "SexualOrientation", "Sexual Orientation"),
name = str_replace_all(name, "SexualActivity", "Sexual Activity"),
name = str_replace_all(name, "COPS_Duration_1", "Pornography Usage (Duration)"),
name = str_replace_all(name, "COPS_Frequency_2", "Pornography Usage (Frequency)")) |>
ggplot(aes(x = value, fill=Sex)) +
geom_bar() +
scale_y_continuous(expand = c(0, 0), breaks= scales::pretty_breaks()) +
scale_fill_manual(values = c("Male"= "#64B5F6", "Female"= "#F06292")) +
labs(title = "Sexual Profile of Participants") +
theme_modern(axis.title.space = 15) +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2), vjust = 7),
axis.text.y = element_text(size = rel(1.1)),
axis.text.x = element_text(size = rel(1.1), angle = 45, hjust = 1),
axis.title.x = element_blank(),
axis.title.y = element_blank()
) +
facet_wrap(~name, scales = "free")
p_sexprofilep_language <- df |>
ggplot(aes(x = Language_Level)) +
geom_bar() +
scale_y_continuous(expand = c(0, 0), breaks= scales::pretty_breaks()) +
labs(x = "Level", y = "Number of Participants", title = "Language Level") +
theme_modern(axis.title.space = 15) +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2), vjust = 7),
axis.text.y = element_text(size = rel(1.1)),
axis.text.x = element_text(size = rel(1.1))
)
p_expertise <- df |>
ggplot(aes(x = AI_Knowledge)) +
geom_bar() +
scale_y_continuous(expand = c(0, 0), breaks= scales::pretty_breaks()) +
labs(x = "Level", y = "Number of Participants", title = "AI-Expertise") +
theme_modern(axis.title.space = 15) +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2), vjust = 7),
axis.text.y = element_text(size = rel(1.1)),
axis.text.x = element_text(size = rel(1.1))
)
p_language | p_expertisedf$Screen_Size <- sqrt(df$Screen_Width * df$Screen_Height)
df |>
ggplot(aes(x = Screen_Size)) +
geom_histogram() +
scale_y_continuous(expand = c(0, 0), breaks= scales::pretty_breaks()) +
labs(x = expression("Screen Size ("~sqrt(Number~of~Pixels)~")"), y = "Number of Participants", title = "Screen Size") +
theme_modern(axis.title.space = 15) +
theme(
plot.title = element_text(size = rel(1.2), face = "bold", hjust = 0),
plot.subtitle = element_text(size = rel(1.2), vjust = 7),
axis.text.y = element_text(size = rel(1.1)),
axis.text.x = element_text(size = rel(1.1))
)`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
p_country /
(p_age + p_edu)Warning: Removed 1 rows containing non-finite values (`stat_bin()`).
Warning: Removed 1 rows containing missing values (`geom_vline()`).
This section pertains to the validation of the BAIT scale measuring beliefs and expectations about artificial creations.
bait <- df |>
select(starts_with("BAIT_"), -BAIT_Duration) |>
rename_with(function(x) gsub("BAIT_\\d_", "", x))
cor <- correlation::correlation(bait, redundant = TRUE) |>
correlation::cor_sort() |>
correlation::cor_lower()
clean_labels <- function(x) {
x <- str_remove_all(x, "BAIT_") |>
str_replace_all("_", " - ")
}
cor |>
mutate(val = paste0(insight::format_value(r), format_p(p, stars_only=TRUE))) |>
mutate(Parameter2 = fct_rev(Parameter2)) |>
mutate(Parameter1 = fct_relabel(Parameter1, clean_labels),
Parameter2 = fct_relabel(Parameter2, clean_labels)) |>
ggplot(aes(x=Parameter1, y=Parameter2)) +
geom_tile(aes(fill = r), color = "white") +
geom_text(aes(label = val), size = 3) +
labs(title = "Correlation Matrix",
subtitle = "Beliefs about Artificial Information Technology (BAIT)") +
scale_fill_gradient2(
low = "#2196F3",
mid = "white",
high = "#F44336",
breaks = c(-1, 0, 1),
guide = guide_colourbar(ticks=FALSE),
midpoint = 0,
na.value = "grey85",
limit = c(-1, 1)) +
theme_minimal() +
theme(legend.title = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1))n <- parameters::n_factors(bait, package = "nFactors")
plot(n)efa <- parameters::factor_analysis(bait, cor=cor(bait), n=3, rotation = "oblimin",
sort=TRUE, scores="tenBerge", fm="ml")
plot(efa)display(efa)| Variable | ML2 | ML1 | ML3 | Complexity | Uniqueness |
|---|---|---|---|---|---|
| EnvironmentReal | 0.67 | 0.02 | -0.04 | 1.01 | 0.53 |
| VideosIssues | 0.58 | -0.20 | 0.19 | 1.47 | 0.59 |
| ImagesRealistic | 0.54 | 0.07 | -0.14 | 1.16 | 0.67 |
| ImitatingReality | 0.53 | 0.03 | -0.18 | 1.23 | 0.63 |
| VideosRealistic | -4.22e-03 | 0.99 | 0.02 | 1.00 | 5.00e-03 |
| TextIssues | 0.08 | 0.06 | 0.66 | 1.05 | 0.58 |
| TextRealistic | 0.16 | -2.62e-03 | -0.66 | 1.12 | 0.47 |
| ImagesIssues | 0.03 | 0.26 | 0.30 | 1.97 | 0.83 |
The 3 latent factors (oblimin rotation) accounted for 46.00% of the total variance of the original data (ML2 = 18.20%, ML1 = 14.10%, ML3 = 13.70%).
m1 <- lavaan::cfa(
"G =~ ImitatingReality + EnvironmentReal + VideosIssues + TextIssues + VideosRealistic + ImagesRealistic + ImagesIssues + TextRealistic",
data=bait)
m2 <- lavaan::cfa(
"Images =~ ImitatingReality + EnvironmentReal + ImagesRealistic + ImagesIssues + VideosIssues + VideosRealistic
Text =~ TextIssues + TextRealistic",
data=bait)
m3 <- lavaan::cfa(
"Images =~ ImitatingReality + EnvironmentReal + ImagesRealistic + ImagesIssues
Videos =~ VideosIssues + VideosRealistic
Text =~ TextIssues + TextRealistic",
data=bait)
m4 <- lavaan::cfa(
"Environment =~ ImitatingReality + EnvironmentReal
Images =~ ImagesRealistic + ImagesIssues
Videos =~ VideosIssues + VideosRealistic
Text =~ TextIssues + TextRealistic",
data=bait)
m5 <- lavaan::cfa(efa_to_cfa(efa, threshold="max"), data=bait)
# bayestestR::bayesfactor_models(m1, m2)
lavaan::anova(m1, m2, m3, m4, m5)Warning in lavTestLRT(object = object, ..., model.names = NAMES): lavaan WARNING:
Some restricted models fit better than less restricted models;
either these models are not nested, or the less restricted model
failed to reach a global optimum. Smallest difference =
-1.41502984149973
Chi-Squared Difference Test
Df AIC BIC Chisq Chisq diff RMSEA Df diff Pr(>Chisq)
m4 14 -193.99 -108.867 71.040
m3 17 -190.59 -117.068 80.446 9.406 0.07767 3 0.02435 *
m5 18 -162.90 -93.257 110.126 29.680 0.28464 1 5.095e-08 ***
m2 19 -166.32 -100.541 108.712 -1.415 0.00000 1 1.00000
m1 20 -110.64 -48.732 166.390 57.679 0.40014 1 3.086e-14 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
display(parameters::parameters(m3, standardize = TRUE))| Link | Coefficient | SE | 95% CI | z | p |
|---|---|---|---|---|---|
| Images =~ ImitatingReality | 0.62 | 0.04 | (0.53, 0.71) | 13.84 | < .001 |
| Images =~ EnvironmentReal | 0.64 | 0.04 | (0.55, 0.73) | 14.54 | < .001 |
| Images =~ ImagesRealistic | 0.59 | 0.05 | (0.50, 0.68) | 12.73 | < .001 |
| Images =~ ImagesIssues | -0.27 | 0.06 | (-0.38, -0.15) | -4.58 | < .001 |
| Videos =~ VideosIssues | 0.77 | 0.07 | (0.63, 0.91) | 10.77 | < .001 |
| Videos =~ VideosRealistic | -0.49 | 0.06 | (-0.60, -0.37) | -8.22 | < .001 |
| Text =~ TextIssues | 0.50 | 0.06 | (0.37, 0.62) | 7.78 | < .001 |
| Text =~ TextRealistic | -0.93 | 0.09 | (-1.12, -0.75) | -9.97 | < .001 |
| Link | Coefficient | SE | 95% CI | z | p |
|---|---|---|---|---|---|
| Images ~~ Videos | 0.68 | 0.07 | (0.53, 0.82) | 9.14 | < .001 |
| Images ~~ Text | -0.53 | 0.07 | (-0.68, -0.39) | -7.17 | < .001 |
| Videos ~~ Text | -0.18 | 0.07 | (-0.32, -0.04) | -2.45 | 0.014 |
Exploratory Graph Analysis (EGA) is a recently developed framework for psychometric assessment, that can be used to estimate the number of dimensions in questionnaire data using network estimation methods and community detection algorithms, and assess the stability of dimensions and items using bootstrapping.
Unique Variable Analysis (Christensen, Garrido, & Golino, 2023) uses the weighted topological overlap measure (Nowick et al., 2009) on an estimated network. Values greater than 0.25 are determined to have considerable local dependence (i.e., redundancy) that should be handled (variables with the highest maximum weighted topological overlap to all other variables (other than the one it is redundant with) should be removed).
uva <- EGAnet::UVA(data = bait, cut.off = 0.3)
uvaVariable pairs with wTO > 0.30 (large-to-very large redundancy)
node_i node_j wto
TextRealistic TextIssues 0.331
----
Variable pairs with wTO > 0.25 (moderate-to-large redundancy)
----
Variable pairs with wTO > 0.20 (small-to-moderate redundancy)
node_i node_j wto
ImitatingReality EnvironmentReal 0.210
VideosRealistic VideosIssues 0.208
uva$keep_remove$keep
[1] "TextIssues"
$remove
[1] "TextRealistic"
ega <- list()
for(model in c("glasso", "TMFG")) {
for(algo in c("walktrap", "louvain")) {
for(type in c("ega", "ega.fit", "riEGA")) { # "hierega"
if(type=="ega.fit" & algo=="louvain") next # Too slow
ega[[paste0(model, "_", algo, "_", type)]] <- EGAnet::bootEGA(
data = bait,
seed=123,
model=model,
algorithm=algo,
EGA.type=type,
type="resampling",
plot.typicalStructure=FALSE,
verbose=FALSE)
}
}
}The random-intercept model converged. Wording effects likely. Results are only valid if data are [4;munrecoded[0m.
The random-intercept model converged. Wording effects likely. Results are only valid if data are [4;munrecoded[0m.
The random-intercept model converged. Wording effects likely. Results are only valid if data are [4;munrecoded[0m.
The random-intercept model converged. Wording effects likely. Results are only valid if data are [4;munrecoded[0m.
EGAnet::compare.EGA.plots(
ega$glasso_walktrap_ega, ega$glasso_walktrap_ega.fit,
ega$glasso_louvain_ega, ega$TMFG_louvain_ega,
ega$glasso_louvain_riEGA, ega$glasso_walktrap_riEGA,
ega$TMFG_walktrap_ega, ega$TMFG_walktrap_ega.fit,
ega$TMFG_louvain_riEGA, ega$TMFG_walktrap_riEGA,
labels=c("glasso_walktrap_ega", "glasso_walktrap_ega.fit",
"glasso_louvain_ega", "TMFG_louvain_ega",
"glasso_louvain_riEGA", "glasso_walktrap_riEGA",
"TMFG_walktrap_ega", "TMFG_walktrap_ega.fit",
"TMFG_louvain_riEGA", "TMFG_walktrap_riEGA"),
rows=5,
plot.all = FALSE)$allFigures shows how often each variable is replicating in their empirical structure across bootstraps.
patchwork::wrap_plots(lapply(ega, plot), nrow = 4)ega_final <- ega$glasso_walktrap_riEGA$EGA
plot(ega_final)ega_scores <- EGAnet::net.scores(bait, ega_final)$scores$std.scores |>
as.data.frame() |>
setNames(c("EGA_Image", "EGA_Videos", "EGA_Text")) # Merge with data
scores <- lavaan::predict(m3) |>
as.data.frame() |>
datawizard::data_addprefix("CFA_") |>
# data_rename(c("ML1", "ML2"), c("BAIT_SEM1", "BAIT_SEM2")) |>
cbind(ega_scores) |>
mutate(Participant = df$Participant)
scores$BAIT_Videos <- (bait$VideosRealistic + (1 - bait$VideosIssues)) / 2
scores$BAIT_Images <- (bait$ImagesRealistic + (1 - bait$ImagesIssues) + bait$ImitatingReality + bait$EnvironmentReal) / 4
scores$BAIT_Text <- (bait$TextRealistic + (1 - bait$TextIssues)) / 2
df <- merge(df, scores, by="Participant")We computed two type of general scores for the BAIT scale, an empirical score based on the average of observed data (of the most loading items) and a model-based score as predicted by the structural model. The first one gives equal weight to all items (and keeps the same [0-1] range), while the second one is based on the factor loadings and the covariance structure.
correlation::cor_test(scores, "BAIT_Images", "CFA_Images") |>
plot() +
ggside::geom_xsidedensity(aes(x=BAIT_Images), color="grey", linewidth=1) +
ggside::geom_ysidedensity(aes(y=CFA_Images), color="grey", linewidth=1) +
ggside::scale_xsidey_continuous(expand = c(0, 0)) +
ggside::scale_ysidex_continuous(expand = c(0, 0)) +
ggside::theme_ggside_void() +
theme(ggside.panel.scale = .1) While the two correlate substantially, they have different benefits. The empirical score has a more straightforward meaning and is more reproducible (as it is not based on a model fitted on a specific sample), the model-based score takes into account the relative importance of the contribution of each item to their factor.
table <- correlation::correlation(scores) |>
summary()
format(table) |>
datawizard::data_rename("Parameter", "Variables") |>
gt::gt() |>
gt::cols_align(align="center") |>
gt::tab_options(column_labels.font.weight="bold")| Variables | BAIT_Text | BAIT_Images | BAIT_Videos | EGA_Text | EGA_Videos | EGA_Image | CFA_Text | CFA_Videos |
|---|---|---|---|---|---|---|---|---|
| CFA_Images | 0.52*** | 0.91*** | -0.61*** | 0.17* | 0.26*** | 0.91*** | -0.62*** | 0.80*** |
| CFA_Videos | 0.18* | 0.62*** | -0.91*** | 0.31*** | 0.09 | 0.63*** | -0.23*** | |
| CFA_Text | -0.89*** | -0.46*** | 0.18* | 0.05 | -0.45*** | -0.46*** | ||
| EGA_Image | 0.38*** | 0.99*** | -0.41*** | 0.06 | 0.15* | |||
| EGA_Videos | 4.03e-03 | 0.14 | -0.05 | 0.03 | ||||
| EGA_Text | -0.08 | 0.04 | -8.53e-04 | |||||
| BAIT_Videos | -0.15* | -0.42*** | ||||||
| BAIT_Images | 0.38*** |
table <- correlation::correlation(
select(scores, starts_with("BAIT_")),
select(df, starts_with("GAAIS")),
bayesian=TRUE) |>
summary()
format(table) |>
datawizard::data_rename("Parameter", "Variables") |>
gt::gt() |>
gt::cols_align(align="center") |>
gt::tab_options(column_labels.font.weight="bold")| Variables | GAAIS_Positive_17 | GAAIS_Negative_9 | GAAIS_Positive_7 | GAAIS_Negative_10 | GAAIS_Negative_15 | GAAIS_Positive_12 |
|---|---|---|---|---|---|---|
| BAIT_Videos | 0.02 | -0.13** | 0.14** | -0.16*** | -0.22*** | 0.08 |
| BAIT_Images | 0.23*** | 0.16** | 0.16*** | -0.02 | 0.03 | 0.26*** |
| BAIT_Text | 0.25*** | 0.05 | 0.26*** | -0.15** | -0.13** | 0.20*** |
df |>
ggplot(aes(x=as.factor(AI_Knowledge), y=BAIT_Images)) +
geom_boxplot()# m <- betareg::betareg(BAIT ~ AI_Knowledge, data=df)
m <- lm(BAIT_Images ~ poly(AI_Knowledge, 2), data=df)
# m <- brms::brm(BAIT ~ mo(AI_Knowledge), data=df, algorithm = "meanfield")
# m <- brms::brm(BAIT ~ AI_Knowledge, data=dfsub, algorithm = "meanfield")
display(parameters::parameters(m))| Parameter | Coefficient | SE | 95% CI | t(351) | p |
|---|---|---|---|---|---|
| (Intercept) | 0.69 | 8.46e-03 | (0.68, 0.71) | 81.89 | < .001 |
| AI Knowledge (1st degree) | -0.13 | 0.16 | (-0.45, 0.18) | -0.84 | 0.399 |
| AI Knowledge (2nd degree) | 0.44 | 0.16 | (0.12, 0.75) | 2.75 | 0.006 |
marginaleffects::predictions(m, by=c("AI_Knowledge"), newdata = "marginalmeans") |>
as.data.frame() |>
ggplot(aes(x=AI_Knowledge, y=estimate)) +
geom_jitter2(data=df, aes(y=BAIT_Images), alpha=0.2, width=0.1) +
geom_line(aes(group=1), position = position_dodge(width=0.2)) +
geom_pointrange(aes(ymin = conf.low, ymax=conf.high), position = position_dodge(width=0.2)) +
theme_minimal() +
labs(x = "AI-Knowledge", y="BAIT Score")# m <- betareg::betareg(BAIT ~ Sex / Age, data=df, na.action=na.omit)
m <- lm(BAIT_Images ~ Sex / Age, data=df)
display(parameters::parameters(m))| Parameter | Coefficient | SE | 95% CI | t(349) | p |
|---|---|---|---|---|---|
| (Intercept) | 0.64 | 0.04 | (0.57, 0.72) | 16.59 | < .001 |
| Sex (Male) | 0.06 | 0.05 | (-0.04, 0.16) | 1.18 | 0.237 |
| Sex (Female) × Age | 2.67e-03 | 1.43e-03 | (-1.38e-04, 5.47e-03) | 1.87 | 0.062 |
| Sex (Male) × Age | -5.64e-04 | 8.08e-04 | (-2.15e-03, 1.03e-03) | -0.70 | 0.485 |
glm(Feedback_LabelsIncorrect ~ BAIT_Images * AI_Knowledge,
data=mutate(df, Feedback_LabelsIncorrect = ifelse(Feedback_LabelsIncorrect=="True", 1, 0)),
family="binomial") |>
parameters::parameters() |>
display(title="Predicting 'Labels are Incorrect'")| Parameter | Log-Odds | SE | 95% CI | z | p |
|---|---|---|---|---|---|
| (Intercept) | -0.76 | 1.49 | (-3.71, 2.18) | -0.51 | 0.611 |
| BAIT Images | -0.38 | 2.03 | (-4.41, 3.58) | -0.19 | 0.852 |
| AI Knowledge | 0.45 | 0.42 | (-0.37, 1.28) | 1.08 | 0.280 |
| BAIT Images × AI Knowledge | -0.36 | 0.57 | (-1.48, 0.76) | -0.63 | 0.530 |
glm(Feedback_LabelsReversed ~ BAIT_Images * AI_Knowledge,
data=mutate(df, Feedback_LabelsReversed = ifelse(Feedback_LabelsReversed=="True", 1, 0)),
family="binomial") |>
parameters::parameters() |>
display(title="Predicting 'Labels are reversed'")| Parameter | Log-Odds | SE | 95% CI | z | p |
|---|---|---|---|---|---|
| (Intercept) | -1.26 | 2.45 | (-6.16, 3.48) | -0.52 | 0.606 |
| BAIT Images | -1.24 | 3.39 | (-8.04, 5.28) | -0.36 | 0.716 |
| AI Knowledge | -0.10 | 0.71 | (-1.49, 1.28) | -0.14 | 0.889 |
| BAIT Images × AI Knowledge | 7.05e-03 | 0.99 | (-1.89, 1.95) | 7.15e-03 | 0.994 |
glm(Feedback_CouldDiscriminate ~ BAIT_Images * AI_Knowledge,
data=mutate(df, Feedback_CouldDiscriminate = ifelse(Feedback_CouldDiscriminate=="True", 1, 0)),
family="binomial") |>
parameters::parameters() |>
display(title="Predicting 'Easy to discriminate'")| Parameter | Log-Odds | SE | 95% CI | z | p |
|---|---|---|---|---|---|
| (Intercept) | 0.18 | 2.45 | (-4.80, 4.86) | 0.07 | 0.942 |
| BAIT Images | -2.88 | 3.29 | (-9.34, 3.59) | -0.88 | 0.381 |
| AI Knowledge | -0.99 | 0.74 | (-2.43, 0.46) | -1.34 | 0.180 |
| BAIT Images × AI Knowledge | 1.16 | 0.96 | (-0.74, 3.03) | 1.20 | 0.231 |
glm(Feedback_CouldNotDiscriminate ~ BAIT_Images * AI_Knowledge,
data=mutate(df, Feedback_CouldNotDiscriminate = ifelse(Feedback_CouldNotDiscriminate=="True", 1, 0)),
family="binomial") |>
parameters::parameters() |>
display(title="Predicting 'Hard to discriminate'")| Parameter | Log-Odds | SE | 95% CI | z | p |
|---|---|---|---|---|---|
| (Intercept) | -3.25 | 1.65 | (-6.58, -0.10) | -1.97 | 0.049 |
| BAIT Images | 5.28 | 2.24 | (1.03, 9.84) | 2.36 | 0.018 |
| AI Knowledge | 0.03 | 0.46 | (-0.87, 0.95) | 0.07 | 0.943 |
| BAIT Images × AI Knowledge | -0.33 | 0.62 | (-1.57, 0.88) | -0.53 | 0.593 |
glm(Feedback_Fun ~ BAIT_Images * AI_Knowledge,
data=mutate(df, Feedback_Fun = ifelse(Feedback_Fun=="True", 1, 0)),
family="binomial") |>
parameters::parameters() |>
display(title="Predicting 'Fun'")| Parameter | Log-Odds | SE | 95% CI | z | p |
|---|---|---|---|---|---|
| (Intercept) | -0.58 | 1.46 | (-3.46, 2.29) | -0.40 | 0.693 |
| BAIT Images | 1.57 | 1.98 | (-2.29, 5.51) | 0.79 | 0.429 |
| AI Knowledge | 0.13 | 0.41 | (-0.68, 0.94) | 0.31 | 0.756 |
| BAIT Images × AI Knowledge | -0.22 | 0.55 | (-1.32, 0.86) | -0.40 | 0.686 |
glm(Feedback_Boring ~ BAIT_Images * AI_Knowledge,
data=mutate(df, Feedback_Boring = ifelse(Feedback_Boring=="True", 1, 0)),
family="binomial") |>
parameters::parameters() |>
display(title="Predicting 'Boring'")| Parameter | Log-Odds | SE | 95% CI | z | p |
|---|---|---|---|---|---|
| (Intercept) | -0.91 | 1.81 | (-4.51, 2.62) | -0.50 | 0.616 |
| BAIT Images | -1.41 | 2.51 | (-6.41, 3.46) | -0.56 | 0.574 |
| AI Knowledge | 0.09 | 0.49 | (-0.88, 1.07) | 0.18 | 0.856 |
| BAIT Images × AI Knowledge | 0.11 | 0.68 | (-1.23, 1.45) | 0.16 | 0.873 |
write.csv(df, "../data/data_participants.csv", row.names = FALSE)
write.csv(dftask, "../data/data.csv", row.names = FALSE)
Comments
Code